Yo !

Block Group Enrichment, Joins, and Aggregation

library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(ggplot2)


dc <- read_csv("/Users/andrewpaladino/Downloads/block_groups_dc.csv")
## New names:
## • `` -> `...1`
## Rows: 571 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): attributes.COUNTY, attributes.TRACT, geometry
## dbl (4): ...1, attributes.STATE, attributes.BLKGRP, GIDBG
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
va <- read_csv("/Users/andrewpaladino/Downloads/block_groups_va.csv")
## New names:
## Rows: 5963 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): attributes.COUNTY, attributes.TRACT, geometry dbl (4): ...1,
## attributes.STATE, attributes.BLKGRP, GIDBG
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
md <- read_csv("/Users/andrewpaladino/Downloads/block_groups_md.csv")
## New names:
## Rows: 4079 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): attributes.COUNTY, attributes.TRACT, geometry dbl (4): ...1,
## attributes.STATE, attributes.BLKGRP, GIDBG
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
osm_buildings <- read_csv("/Users/andrewpaladino/Documents/xyzeus/data/building_footprints.csv")
## New names:
## Rows: 325170 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): nodes, tags, name, type, geometry, members dbl (4): ...1, id, longitude,
## latitude
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
noaa_flood_dc <- read_csv("/Users/andrewpaladino/Downloads/noaa_flood_dc.csv")
## New names:
## Rows: 255 Columns: 19
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (10): CZ_NAME_STR, BEGIN_LOCATION, BEGIN_DATE, EVENT_TYPE, STATE_ABBR, C... dbl
## (9): ...1, EVENT_ID, BEGIN_TIME, EPISODE_ID, END_TIME, BEGIN_LON, BEGIN...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
storm_data <- read_delim("/Users/andrewpaladino/Downloads/storm_details_dmv.csv", delim = "|")
## Rows: 78921 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "|"
## chr (26): STATE, MONTH_NAME, EVENT_TYPE, CZ_TYPE, CZ_NAME, WFO, BEGIN_DATE_T...
## dbl (25): BEGIN_YEARMONTH, BEGIN_DAY, BEGIN_TIME, END_YEARMONTH, END_DAY, EN...
## lgl  (1): CATEGORY
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bg_total <- bind_rows(dc, va, md)
bg_total_geo <- st_as_sf(bg_total, wkt = "geometry", crs = 4326)
bg_total_geo <- bg_total_geo %>%
  mutate(census_geometry = geometry)
bg_total_geo <- st_make_valid(bg_total_geo)

storm_data <- storm_data %>%
  filter(!is.na(BEGIN_LAT) & !is.na(BEGIN_LON))

storm_data_geo <- st_as_sf(storm_data, coords = c("BEGIN_LON", "BEGIN_LAT"), crs = 4326)
census_stormdata <- st_join(storm_data_geo, bg_total_geo, join = st_within)
## although coordinates are longitude/latitude, st_within assumes that they are
## planar
## New names:
census_stormdata <- census_stormdata %>%
  mutate(
    county_code = paste0(`attributes.STATE`, `attributes.COUNTY`),
    tract_code = paste0(`attributes.STATE`, `attributes.COUNTY`, `attributes.TRACT`)
  )

census_stormdata$BEGIN_DATE_TIME <- as.POSIXct(
  census_stormdata$BEGIN_DATE_TIME,
  format = "%d-%b-%y %H:%M:%S",
  tz = "UTC"
)

############################################################################
##### Mapping State Name and County Names to begin lat / begin long ########
############################################################################


# 1. Create the FIPS to State mapping
fips_to_state <- c(
  `11` = "DISTRICT OF COLUMBIA",
  `24` = "MARYLAND",
  `51` = "VIRGINIA"
)

# 2. Create Virginia, Maryland, and DC counties as dataframes
va_counties <- tibble(
  attributes.COUNTY = c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29,
                        31, 33, 35, 36, 37, 41, 43, 45, 47, 49, 51, 53, 57, 59, 61,
                        63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
                        93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115, 117,
                        119, 121, 125, 127, 131, 133, 135, 137, 139, 141, 143, 145,
                        147, 149, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171,
                        173, 175, 177, 179, 181, 183, 185, 187, 191, 193, 195, 197,
                        199, 510, 520, 530, 540, 550, 570, 590, 610, 620, 630, 640,
                        650, 660, 670, 680, 683, 690, 700, 710, 720, 730, 735, 740,
                        750, 760, 770, 775, 790, 800, 810, 820, 830, 840),
  County_Name = c(
    "Accomack", "Albemarle", "Alleghany", "Amelia", "Amherst",
    "Appomattox", "Arlington", "Augusta", "Bath", "Bedford",
    "Bland", "Botetourt", "Brunswick", "Buchanan", "Buckingham",
    "Campbell", "Caroline", "Carroll", "Charles City", "Charlotte",
    "Chesterfield", "Clarke", "Craig", "Culpeper", "Cumberland",
    "Dickenson", "Dinwiddie", "Essex", "Fairfax", "Fauquier",
    "Floyd", "Fluvanna", "Franklin", "Frederick", "Giles",
    "Gloucester", "Goochland", "Grayson", "Greene", "Greensville",
    "Halifax", "Hanover", "Henrico", "Henry", "Highland",
    "Isle of Wight", "James City", "King and Queen", "King George",
    "King William", "Lancaster", "Lee", "Loudoun", "Louisa",
    "Lunenburg", "Madison", "Mathews", "Mecklenburg", "Middlesex",
    "Montgomery", "Nelson", "New Kent", "Northampton", "Northumberland",
    "Nottoway", "Orange", "Page", "Patrick", "Pittsylvania",
    "Powhatan", "Prince Edward", "Prince George", "Prince William",
    "Pulaski", "Rappahannock", "Richmond", "Roanoke", "Rockbridge",
    "Rockingham", "Russell", "Scott", "Shenandoah", "Smyth",
    "Southampton", "Spotsylvania", "Stafford", "Surry", "Sussex",
    "Tazewell", "Warren", "Washington", "Westmoreland", "Wise",
    "Wythe", "York", "Alexandria City", "Bristol City", "Buena Vista City",
    "Charlottesville City", "Chesapeake City", "Colonial Heights City",
    "Danville City", "Falls Church City", "Franklin City", "Fredericksburg City",
    "Galax City", "Hampton City", "Harrisonburg City", "Hopewell City",
    "Lynchburg City", "Manassas City", "Martinsville City", "Newport News City",
    "Norfolk City", "Norton City", "Petersburg City", "Poquoson City",
    "Portsmouth City", "Radford City", "Richmond City", "Roanoke City",
    "Salem City", "Staunton City", "Suffolk City", "Virginia Beach City",
    "Waynesboro City", "Williamsburg City", "Winchester City"
  ),
  attributes.STATE = 51
)

md_counties <- tibble(
  attributes.COUNTY = c(1, 3, 5, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
                        33, 35, 37, 39, 41, 43, 45, 47, 510),
  County_Name = c(
    "Allegany", "Anne Arundel", "Baltimore", "Calvert", "Caroline",
    "Carroll", "Cecil", "Charles", "Dorchester", "Frederick",
    "Garrett", "Harford", "Howard", "Kent", "Montgomery",
    "Prince Georges", "Queen Annes", "St. Marys", "Somerset",
    "Talbot", "Washington", "Wicomico", "Worcester", "Baltimore City"
  ),
  attributes.STATE = 24
)

dc_county <- tibble(
  attributes.COUNTY = 1,
  County_Name = "District of Columbia",
  attributes.STATE = 11
)

# 3. Combine all into one lookup
county_lookup <- bind_rows(va_counties, md_counties, dc_county)

# 4. Map State Name
census_stormdata <- census_stormdata %>%
  mutate(STATE_MAPPED = recode(as.character(attributes.STATE), !!!fips_to_state)) %>%
  mutate(attributes.COUNTY = as.numeric(attributes.COUNTY))

# 5. Merge county names
census_stormdata <- census_stormdata %>%
  left_join(county_lookup, by = c("attributes.STATE", "attributes.COUNTY"))

#########################################
###########. solely flooding events #####
#########################################

library(stringr)

# Define regex pattern
flood_keywords <- "flood|flash flood|flooding"

# Filter flood-related events
flood_events <- census_stormdata %>%
  filter(
    str_detect(EVENT_TYPE, regex(flood_keywords, ignore_case = TRUE)) |
    str_detect(EPISODE_NARRATIVE, regex(flood_keywords, ignore_case = TRUE)) |
    str_detect(EVENT_NARRATIVE, regex(flood_keywords, ignore_case = TRUE))
  )


#####################################
###########. aggregate event counts #
#####################################


# By YEAR, GIDBG, and census_geometry
bg_group <- census_stormdata %>%
  group_by(YEAR, GIDBG, census_geometry) %>%
  summarize(event_count = n(), .groups = "drop")

bg_group_flood <- flood_events %>%
  group_by(YEAR, GIDBG, census_geometry) %>%
  summarize(event_count = n(), .groups = "drop")

# Total events per GIDBG and census_geometry
sum_bg_group <- bg_group %>%
  group_by(GIDBG, census_geometry) %>%
  summarize(total_events = sum(event_count), .groups = "drop")

sum_bg_group_flood <- bg_group_flood %>%
  group_by(GIDBG, census_geometry) %>%
  summarize(total_events = sum(event_count), .groups = "drop")

# By YEAR and tract_code
tract_group <- census_stormdata %>%
  group_by(YEAR, tract_code) %>%
  summarize(event_count = n(), .groups = "drop")
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
tract_group_flood <- flood_events %>%
  group_by(YEAR, tract_code) %>%
  summarize(event_count = n(), .groups = "drop")
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# By YEAR and county_code
county_group <- census_stormdata %>%
  group_by(YEAR, county_code) %>%
  summarize(event_count = n(), .groups = "drop")
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
county_group_flood <- flood_events %>%
  group_by(YEAR, county_code) %>%
  summarize(event_count = n(), .groups = "drop")
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# By YEAR and STATE
state_group <- census_stormdata %>%
  group_by(YEAR, STATE) %>%
  summarize(event_count = n(), .groups = "drop")
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
state_group_flood <- flood_events %>%
  group_by(YEAR, STATE) %>%
  summarize(event_count = n(), .groups = "drop")
## although coordinates are longitude/latitude, st_union assumes that they are
## planar

County Level Enrichment, Joins, and Aggregation

library(dplyr)
library(readr)
library(sf)
library(stringr)


dc_counties <- read_csv('/Users/andrewpaladino/Downloads/dc_counties.csv') %>%
  mutate(attributes.FUNCSTAT = as.character(attributes.FUNCSTAT))
## New names:
## Rows: 1 Columns: 20
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (8): attributes.MTFCC, attributes.COUNTY, attributes.COUNTYNS, attribut... dbl
## (11): ...1, attributes.OID, attributes.GEOID, attributes.STATE, attribut... lgl
## (1): attributes.FUNCSTAT
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
va_counties <- read_csv('/Users/andrewpaladino/Downloads/virginia_counties.csv') %>%
  mutate(attributes.FUNCSTAT = as.character(attributes.FUNCSTAT))
## New names:
## Rows: 133 Columns: 20
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (9): attributes.MTFCC, attributes.COUNTY, attributes.COUNTYNS, attribut... dbl
## (11): ...1, attributes.OID, attributes.GEOID, attributes.STATE, attribut...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
md_counties <- read_csv('/Users/andrewpaladino/Downloads/md_counties.csv') %>%
  mutate(attributes.FUNCSTAT = as.character(attributes.FUNCSTAT))
## New names:
## Rows: 24 Columns: 20
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (9): attributes.MTFCC, attributes.COUNTY, attributes.COUNTYNS, attribut... dbl
## (11): ...1, attributes.OID, attributes.GEOID, attributes.STATE, attribut...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
county_total <- bind_rows(dc_counties, va_counties, md_counties) %>%
  mutate(geometry = st_as_sfc(geometry, crs = 4326))

county_total_geo <- st_as_sf(county_total, crs = 4326)
county_total_geo$census_geometry <- county_total_geo$geometry
county_stormdata <- st_join(storm_data_geo, county_total_geo, join = st_within)
## although coordinates are longitude/latitude, st_within assumes that they are
## planar
## New names:
flood_keywords <- c("flood", "flash flood", "flooding", "coastal flood")
pattern <- str_c(flood_keywords, collapse = "|")  

flood_events_county <- county_stormdata %>%
  filter(
    str_detect(EVENT_TYPE, regex(pattern, ignore_case = TRUE)) |
    str_detect(EPISODE_NARRATIVE, regex(pattern, ignore_case = TRUE)) |
    str_detect(EVENT_NARRATIVE, regex(pattern, ignore_case = TRUE))
  )

flood_events_county_group <- flood_events_county %>%
  group_by(attributes.NAME, census_geometry) %>%
  summarise(total_events = n(), .groups = "drop") %>%
  arrange(desc(total_events))

# 6. Save to CSV if needed
# write_csv(flood_events_county_group, "county_flood_events.csv")

Event Counts Per County

library(ggplot2)
library(sf)
library(dplyr)

# Create the map
flood_map <- ggplot(flood_events_county_group) +
  geom_sf(aes(fill = total_events), color = "white", size = 0.2) +  # thinner county borders
  geom_sf_text(aes(label = attributes.NAME), size = 2.5, color = "white") +  # smaller font for county names
  scale_fill_viridis_c(option = "plasma", name = "Total Flood Events") +
  labs(
    title = "Total Flood Events by County",
    subtitle = "DMV Area",
    caption = "Source: Storm Data + Census Blockgroups"
  ) +
  theme_minimal(base_size = 10) +  # overall smaller base font
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5),
    legend.position = "right",
    panel.grid.major = element_line(color = "grey90", linetype = "dotted")
  ) +
  coord_sf(datum = NA)

# Display
print(flood_map)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

# Optional: Save the plot MUCH bigger
ggsave("/Users/andrewpaladino/Downloads/flood_events_county_map.png", plot = flood_map, width = 20, height = 15, dpi = 300)  # huge plot
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).

Line Plots / Timeseries Analysis of Event Group Per Block Groups Over Time

ggplot(bg_group, aes(x = YEAR, y = event_count, color = GIDBG, group = GIDBG)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by GIDBG", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

ggplot(bg_group_flood, aes(x = YEAR, y = event_count, color = GIDBG, group = GIDBG)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by GIDBG", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

Line Plots / Timeseries Analysis of Event Group Per Tracts Over Time

ggplot(tract_group, aes(x = YEAR, y = event_count, color = tract_code, group = tract_code)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by Tract", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

ggplot(tract_group_flood, aes(x = YEAR, y = event_count, color = tract_code, group = tract_code)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by Tract", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

# Line Plots / Timeseries Analysis of Event Group Per County Over Time

ggplot(county_group, aes(x = YEAR, y = event_count, color = county_code, group = county_code)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by Tract", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

ggplot(county_group_flood, aes(x = YEAR, y = event_count, color = county_code, group = county_code)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by County", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

Line Plots / Timeseries Analysis of Event Group Per State Over Time

ggplot(state_group, aes(x = YEAR, y = event_count, color = STATE, group = STATE)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by Tract", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

ggplot(state_group_flood, aes(x = YEAR, y = event_count, color = STATE, group = STATE)) +
  geom_line() +
  geom_point() +
  labs(title = "Event Count by State", x = "Year", y = "Event Count") +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 6)
  ) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE))

Total Event Count vs. Flood Specific Event Count per County over Time

ggplot(sum_bg_group) +
  geom_sf(aes(fill = total_events), color = NA, size = 0.4) +
  scale_fill_viridis_c(option = "plasma", name = "Event Count") +
  labs(
    title = "Total Events per Block Group",
    subtitle = "GIDBG-level Choropleth",
    caption = "Data Source: Your Project"
  ) +
  theme_minimal()

ggplot(sum_bg_group_flood) +
  geom_sf(aes(fill = total_events), color = NA, size = 0.4) +
  scale_fill_viridis_c(option = "plasma", name = "Event Count") +
  labs(
    title = "Total Events per Block Group",
    subtitle = "GIDBG-level Choropleth",
    caption = "Data Source: Your Project"
  ) +
  theme_minimal()

Creating plot to include in the animation in the below cell, both are which we did not use in the final report

#install.packages("gganimate")
#install.packages("transformr")  # needed for transitions
#install.packages("gifski")
#install.packages("png")
#install.packages("av")
#install.packages("ggspatial")
#install.packages("rosm")

library(ggplot2)
library(gganimate)
library(sf)


all_events_ani <- ggplot(bg_group) +
  geom_sf(data = bg_total_geo, fill = NA, color = "white", size = 0.2) +  # <- outline layer
  geom_sf(aes(fill = event_count), color = NA) +
  scale_fill_viridis_c(option = "plasma", name = "Event Count") +
  labs(
    title = 'Event Count by Block Group — Year: {current_frame}',
    subtitle = "Animated Choropleth",
    caption = "Data Source: Your Project"
  ) +
  theme_minimal() +
  transition_manual(frames = YEAR)



flood_events_ani <- ggplot(bg_group_flood) +
  geom_sf(data = bg_total_geo, fill = NA, color = "grey", size = 0.05) +  # <- outline layer
  geom_sf(aes(fill = event_count), color = NA) +
  scale_fill_viridis_c(option = "plasma", name = "Event Count") +
  labs(
    title = 'Flood Event Count by Block Group — Year: {current_frame}',
    subtitle = "Animated Choropleth",
    caption = "Data Source: Your Project"
  ) +
  theme_minimal() +
  transition_manual(frames = YEAR)

Animation of Flood Event Counts per Block Group, which we did not include in the Report

#animate(all_events_ani, width = 1000, height = 800, fps = 2, renderer = gifski_renderer())
#animate(flood_events_ani, width = 1000, height = 800, fps = 2, renderer = gifski_renderer())

Mapping FLOOD DAMAGE PROPERTY per block group ~ did not include in final report / analysis

# 1. Filter non-missing DAMAGE_PROPERTY
flood_events_damage <- flood_events %>%
  filter(!is.na(DAMAGE_PROPERTY))

# 2. Convert DAMAGE_PROPERTY to numeric (handles K, M, B)
flood_events_damage <- flood_events_damage %>%
  mutate(
    DAMAGE_PROPERTY_NUM = case_when(
      str_detect(DAMAGE_PROPERTY, "K") ~ as.numeric(str_replace(DAMAGE_PROPERTY, "K", "")) * 1e3,
      str_detect(DAMAGE_PROPERTY, "M") ~ as.numeric(str_replace(DAMAGE_PROPERTY, "M", "")) * 1e6,
      str_detect(DAMAGE_PROPERTY, "B") ~ as.numeric(str_replace(DAMAGE_PROPERTY, "B", "")) * 1e9,
      TRUE ~ as.numeric(DAMAGE_PROPERTY)
    )
  )
## Warning: There were 4 warnings in `stopifnot()`.
## The first warning was:
## ℹ In argument: `DAMAGE_PROPERTY_NUM = case_when(...)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
# 3. Group and summarize, then remove zero-damage geometries
flood_events_damage_group <- flood_events_damage %>%
  group_by(GIDBG, census_geometry) %>%
  summarise(sumtotal_damage = sum(DAMAGE_PROPERTY_NUM, na.rm = TRUE), .groups = "drop") %>%
  filter(sumtotal_damage > 0)  # <- remove zeros

# 4. Sort descending by damage
flood_events_damage_group <- flood_events_damage_group %>%
  arrange(desc(sumtotal_damage))

ggplot(flood_events_damage_group) +
  geom_sf(aes(fill = sumtotal_damage), color = NA, size = 10) +
  scale_fill_viridis_c(option = "plasma", name = "Damage Totals") +
  labs(
    title = "Total Property Damage per Block Group",
    subtitle = "GIDBG-level Choropleth",
    caption = "Data Source: Your Project"
  ) +
  theme_minimal()

Mapping Flood Damage Crops per block group over time, this analysis was also not included in final report / analysis

# 1. Filter non-missing DAMAGE_CROPS
flood_events_crop <- flood_events %>%
  filter(!is.na(DAMAGE_CROPS))

# 2. Convert DAMAGE_CROPS to numeric
flood_events_crop <- flood_events_crop %>%
  mutate(
    DAMAGE_CROPS_NUM = case_when(
      str_detect(DAMAGE_CROPS, "K") ~ as.numeric(str_replace(DAMAGE_CROPS, "K", "")) * 1e3,
      str_detect(DAMAGE_CROPS, "M") ~ as.numeric(str_replace(DAMAGE_CROPS, "M", "")) * 1e6,
      str_detect(DAMAGE_CROPS, "B") ~ as.numeric(str_replace(DAMAGE_CROPS, "B", "")) * 1e9,
      TRUE ~ as.numeric(DAMAGE_CROPS)
    )
  )
## Warning: There were 4 warnings in `stopifnot()`.
## The first warning was:
## ℹ In argument: `DAMAGE_CROPS_NUM = case_when(...)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
# 3. Group by GIDBG and census_geometry, summarize total crop damage, remove zeros
flood_events_crop_group <- flood_events_crop %>%
  group_by(GIDBG, census_geometry) %>%
  summarise(sumtotal_crop_damage = sum(DAMAGE_CROPS_NUM, na.rm = TRUE), .groups = "drop") %>%
  filter(sumtotal_crop_damage > 0)

# 4. Sort descending
flood_events_crop_group <- flood_events_crop_group %>%
  arrange(desc(sumtotal_crop_damage))

ggplot(flood_events_crop_group) +
  geom_sf(aes(fill = sumtotal_crop_damage), color = NA, size = 0.1) +
  scale_fill_viridis_c(option = "plasma", name = "Crop Damage Totals") +
  labs(
    title = "Total Crop Damage per Block Group",
    subtitle = "GIDBG-level Choropleth",
    caption = "Data Source: Your Project"
  ) +
  theme_minimal()

Mapping Magnitude of flood events per block group over time.

# 1. Filter non-missing MAGNITUDE
flood_events_mag <- flood_events %>%
  filter(!is.na(MAGNITUDE))

# 2. Group and summarize average magnitude
flood_events_mag_group <- flood_events_mag %>%
  group_by(GIDBG, census_geometry) %>%
  summarise(avg_magnitude = mean(MAGNITUDE, na.rm = TRUE), .groups = "drop") %>%
  filter(avg_magnitude > 0)

# 3. Sort descending
flood_events_mag_group <- flood_events_mag_group %>%
  arrange(desc(avg_magnitude))

ggplot(flood_events_mag_group) +
  geom_sf(aes(fill = avg_magnitude), color = NA, size = 0.1) +
  scale_fill_viridis_c(option = "plasma", name = "Avg. Magnitude") +
  labs(
    title = "Average Event Magnitude per Block Group",
    subtitle = "GIDBG-level Choropleth",
    caption = "Data Source: Your Project"
  ) +
  theme_minimal()

Computing Summary Tables Total Events using aggregation functions

library(dplyr)

# Grouping by full granularity: state, county, tract, block group
total_events_block <- census_stormdata %>%
  group_by(STATE_MAPPED, attributes.STATE, County_Name, attributes.COUNTY, attributes.TRACT, attributes.BLKGRP) %>%
  summarise(total_event_count_per_block = n(), .groups = "drop") %>%
  arrange(desc(total_event_count_per_block))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# Grouping by tract
total_events_tracks <- census_stormdata %>%
  group_by(STATE_MAPPED, attributes.STATE, County_Name, attributes.COUNTY, attributes.TRACT) %>%
  summarise(total_event_count_per_tract = n(), .groups = "drop") %>%
  arrange(desc(total_event_count_per_tract))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# Grouping by county
total_events_county <- census_stormdata %>%
  group_by(STATE_MAPPED, attributes.STATE, County_Name, attributes.COUNTY) %>%
  summarise(total_event_count_per_county = n(), .groups = "drop") %>%
  arrange(desc(total_event_count_per_county))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# Grouping by state only
total_events_state <- census_stormdata %>%
  group_by(STATE_MAPPED, attributes.STATE) %>%
  summarise(total_event_count_per_state = n(), .groups = "drop") %>%
  arrange(desc(total_event_count_per_state))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
total_events_block
## Simple feature collection with 6758 features and 7 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -84.63 ymin: 36.27 xmax: -72.08 ymax: 39.997
## Geodetic CRS:  WGS 84
## # A tibble: 6,758 × 8
##    STATE_MAPPED attributes.STATE County_Name  attributes.COUNTY attributes.TRACT
##    <chr>                   <dbl> <chr>                    <dbl> <chr>           
##  1 <NA>                       NA <NA>                        NA <NA>            
##  2 VIRGINIA                   51 Richmond Ci…               760 040300          
##  3 VIRGINIA                   51 Campbell                    31 020800          
##  4 VIRGINIA                   51 Amelia                       7 930200          
##  5 VIRGINIA                   51 Hampton City               650 011500          
##  6 VIRGINIA                   51 Pittsylvania               143 010500          
##  7 VIRGINIA                   51 Buckingham                  29 930202          
##  8 VIRGINIA                   51 Halifax                     83 930301          
##  9 VIRGINIA                   51 Madison                    113 930101          
## 10 MARYLAND                   24 Montgomery                  31 700731          
## # ℹ 6,748 more rows
## # ℹ 3 more variables: attributes.BLKGRP <dbl>,
## #   total_event_count_per_block <int>, geometry <GEOMETRY [°]>
total_events_tracks
## Simple feature collection with 3305 features and 6 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -84.63 ymin: 36.27 xmax: -72.08 ymax: 39.997
## Geodetic CRS:  WGS 84
## # A tibble: 3,305 × 7
##    STATE_MAPPED attributes.STATE County_Name  attributes.COUNTY attributes.TRACT
##    <chr>                   <dbl> <chr>                    <dbl> <chr>           
##  1 VIRGINIA                   51 Buckingham                  29 930202          
##  2 <NA>                       NA <NA>                        NA <NA>            
##  3 VIRGINIA                   51 Pittsylvania               143 010500          
##  4 VIRGINIA                   51 Amelia                       7 930200          
##  5 VIRGINIA                   51 Rappahannock               157 950200          
##  6 VIRGINIA                   51 Nelson                     125 950300          
##  7 VIRGINIA                   51 Orange                     137 110105          
##  8 VIRGINIA                   51 Halifax                     83 930400          
##  9 VIRGINIA                   51 Culpeper                    47 930400          
## 10 VIRGINIA                   51 Richmond                   159 040100          
## # ℹ 3,295 more rows
## # ℹ 2 more variables: total_event_count_per_tract <int>,
## #   geometry <MULTIPOINT [°]>
total_events_county
## Simple feature collection with 159 features and 5 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -84.63 ymin: 36.27 xmax: -72.08 ymax: 39.997
## Geodetic CRS:  WGS 84
## # A tibble: 159 × 6
##    STATE_MAPPED attributes.STATE County_Name    attributes.COUNTY
##    <chr>                   <dbl> <chr>                      <dbl>
##  1 MARYLAND                   24 Montgomery                    31
##  2 VIRGINIA                   51 Fairfax                       59
##  3 MARYLAND                   24 Baltimore                      5
##  4 VIRGINIA                   51 Loudoun                      107
##  5 MARYLAND                   24 Frederick                     21
##  6 MARYLAND                   24 Prince Georges                33
##  7 MARYLAND                   24 Harford                       25
##  8 MARYLAND                   24 Anne Arundel                   3
##  9 VIRGINIA                   51 Pittsylvania                 143
## 10 VIRGINIA                   51 Albemarle                      3
## # ℹ 149 more rows
## # ℹ 2 more variables: total_event_count_per_county <int>,
## #   geometry <MULTIPOINT [°]>
total_events_state
## Simple feature collection with 4 features and 3 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -84.63 ymin: 36.27 xmax: -72.08 ymax: 39.997
## Geodetic CRS:  WGS 84
## # A tibble: 4 × 4
##   STATE_MAPPED attributes.STATE total_event_count_pe…¹                  geometry
##   <chr>                   <dbl>                  <int>          <MULTIPOINT [°]>
## 1 VIRGINIA                   51                  32853 ((-83.6 36.6), (-83.56 3…
## 2 MARYLAND                   24                  12529 ((-79.486 39.277), (-79.…
## 3 DISTRICT OF…               11                    533 ((-77.112 38.9303), (-77…
## 4 <NA>                       NA                    135 ((-84.63 36.42), (-83.5 …
## # ℹ abbreviated name: ¹​total_event_count_per_state

Creating Summary Table of Flood Events per census level over time

library(dplyr)

# Block group level (STATE, COUNTY, TRACT, BLOCK GROUP)
flood_events_blocks <- flood_events %>%
  group_by(STATE_MAPPED, attributes.STATE, County_Name, attributes.COUNTY, attributes.TRACT, attributes.BLKGRP) %>%
  summarise(`Flood Event Count Per Block Group` = n(), .groups = "drop") %>%
  arrange(desc(`Flood Event Count Per Block Group`))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# Tract level (STATE, COUNTY, TRACT)
flood_events_tracts <- flood_events %>%
  group_by(STATE_MAPPED, attributes.STATE, County_Name, attributes.COUNTY, attributes.TRACT) %>%
  summarise(`Flood Event Count per Tract` = n(), .groups = "drop") %>%
  arrange(desc(`Flood Event Count per Tract`))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# County level (STATE, COUNTY)
flood_events_counties <- flood_events %>%
  group_by(STATE_MAPPED, attributes.STATE, County_Name, attributes.COUNTY) %>%
  summarise(`Flood Event Count per County` = n(), .groups = "drop") %>%
  arrange(desc(`Flood Event Count per County`))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
# State level (STATE)
flood_events_states <- flood_events %>%
  group_by(STATE_MAPPED, attributes.STATE) %>%
  summarise(`Flood Event Count per State` = n(), .groups = "drop") %>%
  arrange(desc(`Flood Event Count per State`))
## although coordinates are longitude/latitude, st_union assumes that they are
## planar
flood_events_blocks
## Simple feature collection with 3833 features and 7 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -83.4098 ymin: 36.5419 xmax: -75.0538 ymax: 39.7225
## Geodetic CRS:  WGS 84
## # A tibble: 3,833 × 8
##    STATE_MAPPED attributes.STATE County_Name  attributes.COUNTY attributes.TRACT
##    <chr>                   <dbl> <chr>                    <dbl> <chr>           
##  1 VIRGINIA                   51 Madison                    113 930101          
##  2 MARYLAND                   24 Prince Geor…                33 800605          
##  3 VIRGINIA                   51 Prince Will…               153 901305          
##  4 VIRGINIA                   51 Fairfax                     59 481900          
##  5 VIRGINIA                   51 Greene                      79 030101          
##  6 VIRGINIA                   51 Giles                       71 930400          
##  7 VIRGINIA                   51 Halifax                     83 930301          
##  8 VIRGINIA                   51 Madison                    113 930202          
##  9 VIRGINIA                   51 Prince Will…               153 901503          
## 10 VIRGINIA                   51 Fairfax                     59 440202          
## # ℹ 3,823 more rows
## # ℹ 3 more variables: attributes.BLKGRP <dbl>,
## #   `Flood Event Count Per Block Group` <int>, geometry <MULTIPOINT [°]>
flood_events_tracts
## Simple feature collection with 2308 features and 6 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -83.4098 ymin: 36.5419 xmax: -75.0538 ymax: 39.7225
## Geodetic CRS:  WGS 84
## # A tibble: 2,308 × 7
##    STATE_MAPPED attributes.STATE County_Name  attributes.COUNTY attributes.TRACT
##    <chr>                   <dbl> <chr>                    <dbl> <chr>           
##  1 VIRGINIA                   51 Prince Will…               153 901305          
##  2 VIRGINIA                   51 Greene                      79 030101          
##  3 VIRGINIA                   51 Madison                    113 930202          
##  4 VIRGINIA                   51 Fairfax                     59 440202          
##  5 VIRGINIA                   51 Nelson                     125 950300          
##  6 VIRGINIA                   51 Greene                      79 030102          
##  7 VIRGINIA                   51 Madison                    113 930101          
##  8 VIRGINIA                   51 Giles                       71 930400          
##  9 VIRGINIA                   51 Frederick                   69 050300          
## 10 MARYLAND                   24 Prince Geor…                33 800605          
## # ℹ 2,298 more rows
## # ℹ 2 more variables: `Flood Event Count per Tract` <int>,
## #   geometry <MULTIPOINT [°]>
flood_events_counties
## Simple feature collection with 159 features and 5 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -83.4098 ymin: 36.5419 xmax: -75.0538 ymax: 39.7225
## Geodetic CRS:  WGS 84
## # A tibble: 159 × 6
##    STATE_MAPPED attributes.STATE County_Name    attributes.COUNTY
##    <chr>                   <dbl> <chr>                      <dbl>
##  1 VIRGINIA                   51 Fairfax                       59
##  2 MARYLAND                   24 Montgomery                    31
##  3 MARYLAND                   24 Baltimore                      5
##  4 MARYLAND                   24 Frederick                     21
##  5 VIRGINIA                   51 Prince William               153
##  6 MARYLAND                   24 Prince Georges                33
##  7 MARYLAND                   24 Harford                       25
##  8 VIRGINIA                   51 Pittsylvania                 143
##  9 VIRGINIA                   51 Albemarle                      3
## 10 VIRGINIA                   51 Loudoun                      107
## # ℹ 149 more rows
## # ℹ 2 more variables: `Flood Event Count per County` <int>,
## #   geometry <MULTIPOINT [°]>
flood_events_states
## Simple feature collection with 4 features and 3 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -83.4098 ymin: 36.5419 xmax: -75.0538 ymax: 39.7225
## Geodetic CRS:  WGS 84
## # A tibble: 4 × 4
##   STATE_MAPPED attributes.STATE Flood Event Count pe…¹                  geometry
##   <chr>                   <dbl>                  <int>          <MULTIPOINT [°]>
## 1 VIRGINIA                   51                   7848 ((-83.4098 36.6675), (-8…
## 2 MARYLAND                   24                   3143 ((-79.4843 39.2985), (-7…
## 3 DISTRICT OF…               11                    167 ((-77.1075 38.9249), (-7…
## 4 <NA>                       NA                      9 ((-80.8834 37.3835), (-8…
## # ℹ abbreviated name: ¹​`Flood Event Count per State`

Piecing it all together !!!

Here we wrote the logic in python to join hazard layers into our table of flood events + block groups. We then computed an aggregation so that on a per block group basis we had total storm event counts and total mapped areas (square meters of flood zones). This logic is included in the commented out section of code below. We imported the resulting table (final table summary) into this environment, where we created a vulnerability score into our table and then enriched our table with block group demographics to analyze socialeconomic trends.

library(sf)
library(dplyr)
library(readr)
library(tidyr)

# 
# # Python logic prior to import
# import pandas as pd
# import geopandas as geo
# from shapely.wkt import loads
# 
# # 1. Load flood hazard layer
# path = '/Users/andrewpaladino/Documents/xyzeus/data/flood_insurance_hazard_all.csv'
# flood_hazard = pd.read_csv(path)
# 
# 
# flood_hazard = flood_hazard[flood_hazard['properties.SFHA_TF'] == 'T']
# flood_hazard['geometry'] = flood_hazard['geometry'].apply(lambda x: loads(x) if pd.notnull(x) else None)
# flood_hazard_geo = geo.GeoDataFrame(flood_hazard, geometry='geometry', crs='EPSG:4326')
# flood_hazard_geo = flood_hazard_geo.drop(columns=['index_left', 'index_right'], errors='ignore')
# flood_events = flood_events.drop(columns=['index_left', 'index_right'], errors='ignore')
# 
# flood_events_flood_hazard = geo.sjoin(flood_events, flood_hazard_geo, how='left', predicate='within')
# flood_summary = flood_events_flood_hazard.groupby(['GIDBG', 'census_geometry']).agg(
#     total_flood_events=('geometry', 'count'),                     
#     unique_flood_zones=('properties.DFIRM_ID', 'nunique'),         
#     unique_zone_types=('properties.FLD_ZONE', lambda x: list(x.dropna().unique())),
#     unique_flooding_risk_types=('properties.esri_symbology', lambda x: list(x.dropna().unique()))
# ).reset_index()
# 
# flood_summary_gdf = geo.GeoDataFrame(
#     flood_summary,
#     geometry=flood_summary['census_geometry'],
#     crs='EPSG:4326'
# ).drop(columns='census_geometry')
# 
# 
# flood_hazard_geo_proj = flood_hazard_geo.to_crs(epsg=6933)
# flood_summary_gdf_proj = flood_summary_gdf.to_crs(epsg=6933)
# flood_hazard_clipped = geo.overlay(flood_hazard_geo_proj, flood_summary_gdf_proj, how='intersection')
# flood_hazard_clipped['clipped_area'] = flood_hazard_clipped.geometry.area  # Now meters²!
# flood_zone_area_summary = flood_hazard_clipped.groupby('GIDBG').agg(
#     total_flood_zone_area=('clipped_area', 'sum')
# ).reset_index()
# 
# final_summary = flood_summary_gdf.merge(flood_zone_area_summary, on='GIDBG', how='left')
# final_summary['total_flood_zone_area'] = final_summary['total_flood_zone_area'].fillna(0)
# 
# print(final_summary.head())
# final_summary.to_csv('final_table_summary.csv')



fh_path <- "/Users/andrewpaladino/Documents/xyzeus/data/flood_insurance_hazard_all.csv"
final_summary_path = '/Users/andrewpaladino/Documents/xyzeus/final_table_summary.csv'
flood_hazard <- read_csv(fh_path)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 43000 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): type, properties.DFIRM_ID, properties.VERSION_ID, properties.FLD_A...
## dbl  (8): ...1, id, properties.OBJECTID, properties.STATIC_BFE, properties.D...
## lgl  (2): properties.SFHA_TF, properties.VEL_UNIT
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
final_summary = read_csv(final_summary_path)
## New names:
## Rows: 3832 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): unique_zone_types, unique_flooding_risk_types, geometry dbl (5): ...1,
## GIDBG, total_flood_events, unique_flood_zones, total_flood_zo...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
library(readr)
library(dplyr)
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(caret)   # for preProcess
## Loading required package: lattice
library(ggpmisc) # for OLS trendline with equation
## Loading required package: ggpp
## Registered S3 methods overwritten by 'ggpp':
##   method                  from   
##   heightDetails.titleGrob ggplot2
##   widthDetails.titleGrob  ggplot2
## 
## Attaching package: 'ggpp'
## 
## The following object is masked from 'package:ggplot2':
## 
##     annotate
dc_attr <- read_csv("/Users/andrewpaladino/Downloads/dc_attribution.csv")
## New names:
## Rows: 571 Columns: 48
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): State_name, County_name, county, tract dbl (44): ...1, GIDBG,
## pct_Female_No_SP_CEN_2020, pct_Females_CEN_2020, pct_...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
md_attr <- read_csv("/Users/andrewpaladino/Downloads/md_attribution.csv")
## New names:
## Rows: 4079 Columns: 48
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): State_name, County_name, county, tract dbl (44): ...1, GIDBG,
## pct_Female_No_SP_CEN_2020, pct_Females_CEN_2020, pct_...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
va_attr <- read_csv("/Users/andrewpaladino/Downloads/va_attribution.csv")
## New names:
## Rows: 5963 Columns: 48
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): State_name, County_name, county, tract dbl (44): ...1, GIDBG,
## pct_Female_No_SP_CEN_2020, pct_Females_CEN_2020, pct_...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
total_attr <- bind_rows(dc_attr, md_attr, va_attr)

attributions_final_merge <- final_summary %>%
  left_join(total_attr, by = "GIDBG")

# Normalize total_flood_events and total_flood_zone_area
norm_vars <- attributions_final_merge %>%
  select(total_flood_events, total_flood_zone_area)

preproc <- preProcess(norm_vars, method = c("range"))  # Min-Max Scaling
norm_scores <- predict(preproc, norm_vars)

# Add normalized scores and vulnerability score
attributions_final_merge <- attributions_final_merge %>%
  mutate(
    event_score = norm_scores$total_flood_events,
    area_score = norm_scores$total_flood_zone_area,
    vulnerability_score = event_score * (1 - area_score)
  )

# Plot 1: Vulnerability vs Median Household Income
ggplot(attributions_final_merge, aes(x = vulnerability_score, y = Med_HHD_Inc_BG_ACS_17_21, color = Med_HHD_Inc_BG_ACS_17_21)) +
  geom_point(alpha = 0.7) +
  scale_color_viridis_c(option = "plasma") +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  labs(
    title = "Vulnerability Score vs Median Household Income",
    x = "Vulnerability Score",
    y = "Median Household Income"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 194 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 194 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Plot 2: Vulnerability vs % Mobile Homes
ggplot(attributions_final_merge, aes(x = vulnerability_score, y = pct_Mobile_Homes_ACS_17_21, color = pct_Mobile_Homes_ACS_17_21)) +
  geom_point(alpha = 0.7) +
  scale_color_viridis_c(option = "plasma") +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  labs(
    title = "Vulnerability Score vs % Mobile Homes",
    x = "Vulnerability Score",
    y = "% Mobile Homes"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 39 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).